*WITH ONLY NARCISSISTS TO CHOOSE FROM Replication Do File
cd "insert working directory here"
use "replicationdata.dta",clear
eststo clear
*Table 1: Narcissism and Running for Political Office

*Model 1
eststo: logit runforoffice_pastyear sins_dum, vce(robust) 
*Figure 1: Distribution of the Single Item Narcissism Scale (SINS)
graph bar (percent)  if e(sample), over(sins_cont,lab(angle(40) labsize(vsmall))) blabel(total, format(%4.1f)) b1title("To what extent do you agree with this statement: I am a narcissist.")  graphregion(col(white)) ytitle("Percentage")

logit runforoffice_pastyear sins_dum age female white black hisp income educ liberal conser participation_sum, vce(robust)


*Standardizing the continuous variables by mean-centering and dividing by two standard deviations 
sum sins_cont if e(sample)
gen sins_cont_standardized=sins_cont-`r(mean)'
replace sins_cont_standardized=sins_cont_standardized/2*(`r(sd)')

sum income if e(sample)
gen income_standardized=income-`r(mean)'
replace income_standardized=income_standardized/2*(`r(sd)')

sum age if e(sample)
gen age_standardized=age-`r(mean)'
replace age_standardized=age_standardized/2*(`r(sd)')

sum educ if e(sample)
gen educ_standardize=educ-`r(mean)'
replace educ_standardize=educ_standardize/2*(`r(sd)')

sum participation_sum if e(sample)
gen participation_sumstandard=participation_sum-`r(mean)'
replace participation_sumstandard=participation_sumstandard/2*(`r(sd)')

*Model 2
eststo: logit runforoffice_pastyear sins_dum  age_standardized female white black hisp income_standardized educ_standardize participation_sumstandard liberal conser , vce(robust) 

*Model 3
eststo:  logit runforoffice_pastyear sins_cont_standardized, vce(robust) 

*Model 4
eststo: logit runforoffice_pastyear sins_cont_standardized age_standardized female white black hisp income_standardized educ_standardize participation_sumstandard liberal conser, vce(robust)

*Figure 2: The Effect of Narcissism on the Propensity to Run for Office
margins, at(sins_cont_standardized=( -.668851(.1) 3.367121))
marginsplot, xscale(range(1(.3) 4))  xlab(  , labsize(small))  ylab(,labsize(small)) graphregion(color(white)) title("") ytitle("Pr(Running for office=1)", size(small)) xtitle("SINS Score",size(small)) title("", size(small)) name(g1,replace)

esttab using analysis.rtf, replace b(3) se(3) lines star(* .05 ** .01 *** .001) compress label title("Table 1: Running for Office and SINS") 

**************************
*Supplementary Information
**************************
eststo clear

*Table S1: Cross-tabulation of SINS and Running for Office in the Past
tab sins_cont runforoffice_pastyear,col 

*Table S2: Descriptive Statistics
logit runforoffice_pastyear sins_dum  age_standardized female white black hisp income_standardized educ_standardize participation_sumstandard liberal conser , vce(robust)

 asdoc sum  runforoffice_pastyear sins_dum sins_cont sins_cont_standardized income age female white black hisp educ participation_sum liberal conser  if e(sample)  //looking at the summary stats in the sample used in the regression

*Table S3: Pairwise Correlations
asdoc  pwcorr runforoffice_pastyear sins_dum sins_cont sins_cont_standardized income age female white black hisp educ participation_sum liberal conser, star(all) setstars(***@.01, **@.05, *@.1) nonum replace

eststo clear

*Table S4: Probit and Rare Events Logit Results
//Probit
eststo: probit runforoffice_pastyear sins_cont_standardized age_standardized female white black hisp income_standardized educ_standardize participation_sumstandard liberal conser, vce(robust) //Model 1

//rare events logit
eststo: relogit runforoffice_pastyear sins_cont_standardized age_standardized female white black hisp income_standardized educ_standardize participation_sumstandard liberal conser //Model 2
esttab using analysis.rtf, replace b(3) se(3) lines star(* .05 ** .01 *** .001) compress label title("Table S4: Probit and Rare Events Logistic Regression Results") 

*Figure S1: Distribution of Running for Political Office 
logit runforoffice_pastyear sins_dum age female white black hisp income educ liberal conser participation_sum, vce(robust)

graph bar (percent) if e(sample), over(runforoffice_pastyear,lab(angle(40) labsize(vsmall))) blabel(total, format(%4.1f)) b1title("Have you ever run for elective office at any level of government (local, state or federal)?") title("") graphregion(col(white)) ytitle("Pencentage")

*Figure S2: Running for Office and SINS Score Dummy (Model 2 Table 1)
eststo clear

*Running Model 2 in Table 1 again
eststo: logit runforoffice_pastyear sins_dum  age_standardized female white black hisp income_standardized educ_standardize participation_sumstandard liberal conser , vce(robust) 

margins, at(sins_dum=(0 1))
marginsplot,  recast(scatter) xscale(range(-0.5 1.5))  xlab(  0 "SINS Score=0" 1"SINS Score=1" , labsize(small)) ylab(,labsize(small)) graphregion(color(white)) title("") ytitle("Pr(Running for office=1)", size(small)) xtitle("") title("", size(small)) name(g1,replace)

*Calculating Cook's Distance to Investigate Influential Observations
eststo clear
preserve
eststo: logit runforoffice_pastyear sins_cont age female white black hisp income educ liberal conser participation_sum //running Model 4 in Table 1

generate index=_n if e(sample)
label variable index "Observation Number"

predict cook, dbeta
label variable cook "Cook Distance"

*Figure S3
graph twoway (scatter cook index, msymbol(d) mlabel(index) mcolor(gs9)), title("") xtitle("Observation Number") ytitle("Cook's Distance") 

drop if index==582 | index==788 | index==764
*We drop 3 influential observations and estimate the model without them:
eststo: logit runforoffice_pastyear sins_cont age female income educ liberal conser participation_sum  
esttab using analysis.rtf, replace b(3) se(3) lines star(* .05 ** .01 *** .001) compress label title("Table S5: Omitting Influential Observations") 

restore
***********
